set more off
clear all
cls



****************************************************************************************************
*****************************CONSUMPTION CATEGORIES*************************************************
****************************************************************************************************


import delimited "original_data/consumption_gdp/cons_categories.csv", clear


keep if v6=="Current prices" || v6=="Constant prices, OECD base year"
keep if transact=="P31DC" | transact=="P31CP050"  | transact=="P31CP090" | transact=="P31CP091" | transact=="P31CP092" | transact=="P31CP093" | transact=="P31CP094" | transact=="P31CP095" | transact=="P31CP096" | transact=="B1_GE" 

replace v6="Current" if v6=="Current prices"
replace v6="Constant" if v6=="Constant prices, OECD base year"

*keep if flagcodes==""
gen a=transact+v6
drop country
rename ïlocation country
label variable country "Country code"

drop measure time referenceperiodcode referenceperiod powercodecode flagcodes flags transaction powercode unitcode unit transact v6

reshape wide value, i(country year) j(a) string

rename valueB1_GECurrent GDP_expenditure_current_2_O

label var GDP_expenditure_current_2_O "GDP (OECD, current prices)"

** Note that the "constant" consumption do not sum to cons_rec_total_constant
** Do not use to compute shares

rename valueP31DCCurrent cons_total_current_O
rename valueP31DCConstant cons_total_constant_O

label var cons_total_current_O "Final consumption expenditure of households (OECD, current prices)"
label var cons_total_constant_O "Final consumption expenditure of households (OECD, constant prices)"

rename valueP31CP090Current cons_rec_total_current_O
rename valueP31CP090Constant cons_rec_total_constant_O

label var cons_rec_total_current_O "Consumption: Total recreation and culture (OECD, current prices)"
label var cons_rec_total_constant_O "Consumption: Total recreation and culture (OECD, constant prices)"

rename valueP31CP091Current cons_rec_audio_video_current_O
rename valueP31CP091Constant cons_rec_audio_video_constant_O

label var cons_rec_audio_video_current_O "Consumption: Audio-visual, photographic and information (OECD, current prices)"
label var cons_rec_audio_video_constant_O "Consumption: Audio-visual, photographic and information (OECD, constant prices)"

rename valueP31CP092Current cons_rec_other_dur_current_O
rename valueP31CP092Constant cons_rec_other_dur_constant_O

label var cons_rec_other_dur_current_O "Consumption: Other recreation durables (OECD, current prices)"
label var cons_rec_other_dur_constant_O "Consumption: Other recreation durables (OECD, constant prices)"

rename valueP31CP093Current cons_rec_other_goods_current_O
rename valueP31CP093Constant cons_rec_other_goods_constant_O

label var cons_rec_other_goods_current_O "Consumption: Other recreation items and equipment (OECD, current prices)"
label var cons_rec_other_goods_constant_O "Consumption: Other recreation items and equipment (OECD, constant prices)"

rename valueP31CP094Current cons_rec_services_current_O
rename valueP31CP094Constant cons_rec_services_constant_O

label var cons_rec_services_current_O "Consumption: Recreation services (OECD, current prices)"
label var cons_rec_services_constant_O "Consumption: Recreation services (OECD, constant prices)"

rename valueP31CP095Current cons_rec_books_current_O
rename valueP31CP095Constant cons_rec_books_constant_O

label var cons_rec_books_current_O "Consumption: Newspaper and books (OECD, current prices)"
label var cons_rec_books_constant_O "Consumption: Newspaper and books (OECD, constant prices)"

rename valueP31CP096Current cons_rec_pack_hol_current_O
rename valueP31CP096Constant cons_rec_pack_hol_constant_O

label var cons_rec_pack_hol_current_O "Consumption: Package holidays (OECD, current prices)"
label var cons_rec_pack_hol_constant_O "Consumption: Package holidays (OECD, constant prices)"


rename valueP31CP050Current cons_hh_items_current_O
rename valueP31CP050Constant cons_hh_items_constant_O

label var cons_hh_items_current_O "Consumption: HH items (OECD, current prices)"
label var cons_hh_items_constant_O "Consumption: HH items (OECD, constant prices)"

save temp_v2.dta, replace


******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************
******************************************************************************

import delimited "original_data/consumption_gdp/gdp.csv", clear

keep if v6=="Current prices" || v6=="Constant prices, OECD base year"
keep if transact=="B1_GA" | transact=="B1_GE"  | transact=="P3" | transact=="P31S14" | transact=="P41" | transact=="B1_GI" | transact=="D1" | transact=="D11"

replace v6="Current" if v6=="Current prices"
replace v6="Constant" if v6=="Constant prices, OECD base year"

gen a=transact+v6

drop country
rename ïlocation country
label variable country "Country code"

keep country a year value
reshape wide value, i(country year) j(a) string

rename valueB1_GAConstant GDP_output_constant_O
rename valueB1_GACurrent GDP_output_current_O

label var GDP_output_constant_O "GDP (OECD, output approach, constant prices)"
label var GDP_output_current_O "GDP (OECD, output approach, current prices)"

rename valueB1_GEConstant GDP_expenditure_constant_O
rename valueB1_GECurrent GDP_expenditure_current_O

label var GDP_expenditure_constant "GDP (OECD, expenditure approach, constant prices)"
label var GDP_expenditure_current "GDP (OECD, expenditure approach, current prices)"


*no data in constant prices in the OECD database
rename valueB1_GICurrent GDP_income_current_O
label var GDP_income_current_O "GDP (OECD, income approach, current prices)"

rename valueD1Current employee_compensation_current_O
label var employee_compensation_current_O "Compensation of employees (OECD, current prices)"

rename valueD11Current wages_current_O
label var wages_current_O "Wages and salaries (OECD, current prices)"

rename valueP3Current consumption_current_O
label var consumption_current_O "Final consumption expenditure (OECD, current prices)"

rename valueP3Constant consumption_constant_O
label var consumption_constant_O "Final consumption expenditure (OECD, constant prices)"

rename valueP31S14Current consumption_hh_current_O
label var consumption_hh_current_O "Final consumption expenditure of households (OECD, current prices)"

rename valueP31S14Constant consumption_hh_constant_O
label var consumption_hh_constant_O"Final consumption expenditure of households (OECD, constant prices)"

rename valueP41Current consumption_personal_current_O
label var consumption_personal_current_O "Actual individual consumption (OECD, current prices)"

rename valueP41Constant consumption_personal_constant_O
label var consumption_personal_constant_O "Actual individual consumption (OECD, constant prices)"


merge 1:1 country year using temp_v2.dta
drop _merge


erase temp_v2.dta


*all gdp measures seem to be virtually the same
keep year country GDP_expenditure_current* cons_total_current cons_rec_total_current cons_rec_audio_video_current cons_rec_other_dur_current cons_rec_other_goods_current cons_hh_items_current/*
	*/ cons_rec_services_current cons_rec_books_current cons_rec_pack_hol_current employee_compensation_current cons_rec_total_constant cons_total_constant cons_hh_items_constant/*
	*/ consumption_hh_current_O
	
preserve
	local list audio_video other_dur other_goods services books pack_hol
	foreach var of local list {
		gen ind_`var'=.
		replace ind_`var'=1 if cons_rec_`var'_current!=.
	}
	rename cons_rec_total_current a
	egen aux=rowtotal(cons_rec*)
	egen aux2=rowtotal(ind*)
	replace aux=. if aux2==0 /*if all recreation subcategories are missing, replace sum of parts to missing (otherwise it is zero) */
	drop aux2 ind*
	rename a cons_rec_total_current
	pwcorr aux cons_rec_total_current /*if very high, it means that sum of components equals to aggregate number (turns out to be true, which is good)*/

restore



save consumption_gdp_OECD_mtd.dta, replace






******************************************************************************
******************************************************************************
***********************Eurostat consumption***********************************
******************************************************************************
******************************************************************************


import delimited "original_data/consumption_gdp/eurostat_cons_categories.csv", clear

keep if unit=="CP_MNAC" /*in euros*/
rename time_period year
rename geo country 

drop dataflow freq unit unit obs_flag
rename obs_value value

sort country coicop year


keep if coicop=="TOTAL" | coicop=="CP09" | coicop=="CP091" | coicop=="CP092" | coicop=="CP093" | coicop=="CP094" | coicop=="CP095" | coicop=="CP096" | coicop=="CP05"


replace coicop="cons_total" if coicop=="TOTAL"
replace coicop="cons_rec_total" if coicop=="CP09"
replace coicop="cons_rec_audio_video" if coicop=="CP091"
replace coicop="cons_rec_books" if coicop=="CP095"
replace coicop="cons_rec_other_dur" if coicop=="CP092"
replace coicop="cons_rec_other_goods" if coicop=="CP093"
replace coicop="cons_rec_pack_hol" if coicop=="CP096"
replace coicop="cons_rec_services" if coicop=="CP094"
replace coicop="cons_hh_items" if coicop=="CP05"


reshape wide value, i(country year) j(coicop) string

local list total rec_total rec_audio_video rec_books rec_other_dur rec_other_goods rec_pack_hol rec_services hh_items
foreach var of local list {
	rename valuecons_`var' cons_`var'_ES
}

label var cons_rec_audio_video_ES "Consumption: Audio-visual, photographic and information (ES, current)"
label var cons_rec_books_ES "Consumption: Newspapers and books  (ES, current)"
label var cons_rec_other_dur_ES "Consumption: Other recreation durables (ES, current)"
label var cons_rec_other_goods_ES "Consumption: Other recreation items and equipment (ES, current)"
label var cons_rec_pack_hol_ES "Consumption: Package holidays (ES, current)"
label var cons_rec_services_ES "Consumption: Recreation services (ES, current)"
label var cons_rec_total_ES "Consumption: Total recreation and culture  (ES, current)"
label var cons_total_ES "Consumption: Total consumption (ES, current)"


drop if cons_total_ES==.

rename country country_full
gen country=""


replace country="AUT" if country_full=="AT"
replace country="BEL" if country_full=="BE"
replace country="BGR" if country_full=="BG"
replace country="HRV" if country_full=="HR"
replace country="CYP" if country_full=="CY"
replace country="CZE" if country_full=="CZ"
replace country="DEU" if country_full=="DE"
replace country="DNK" if country_full=="DK"
replace country="EST" if country_full=="EE"
replace country="FIN" if country_full=="FI"
replace country="FRA" if country_full=="FR"
replace country="GRC" if country_full=="EL"
replace country="HUN" if country_full=="HU"
replace country="ISL" if country_full=="IS"
replace country="IRL" if country_full=="IE"
replace country="ITA" if country_full=="IT"
replace country="LVA" if country_full=="LV"
replace country="LTU" if country_full=="LT"
replace country="LUX" if country_full=="LU"
replace country="MKD" if country_full=="MK"
replace country="MLT" if country_full=="MT"
replace country="NLD" if country_full=="NL"
replace country="NOR" if country_full=="NO"
replace country="POL" if country_full=="PL"
replace country="PRT" if country_full=="PT"
replace country="ROU" if country_full=="RO"
replace country="SVK" if country_full=="SK"
replace country="SVN" if country_full=="SI"
replace country="SCG" if country_full=="RS"
replace country="ESP" if country_full=="ES"
replace country="SWE" if country_full=="SE"
replace country="CHE" if country_full=="CH"
replace country="TUR" if country_full=="TR"
replace country="GBR" if country_full=="UK"
replace country="USA" if country_full=="US"
replace country="ALB" if country_full=="AL"
replace country="MNE" if country_full=="ME"
replace country="TUR" if country_full=="TR"
replace country="BIH" if country_full=="BA"

drop if country==""

rename country_full country_short


save eurostat_cons_v2, replace





******************************************************************************
******************************************************************************
***********************Eurostat GDP*******************************************
******************************************************************************
******************************************************************************

import delimited "original_data/consumption_gdp/eurostat_gdp.csv", clear

rename time_period year
rename geo country_full

drop dataflow freq obs_flag
replace unit="current" if unit=="CP_MNAC"
replace unit="volume" if unit=="CLV10_MEUR"

keep if unit=="current"
drop unit

rename obs_value v

replace na_item="GDP_current" if na_item=="B1GQ"
replace na_item="cons_total_current" if na_item=="P31_S14"
replace na_item="employee_compensation_current" if na_item=="D1"
replace na_item="wages_current" if na_item=="D11"


keep if na_item=="GDP_current" | na_item=="cons_total_current" | na_item=="employee_compensation_current" | na_item=="wages_current"


gen country=""
replace country="MNE" if country_full=="ME"
replace country="BIH" if country_full=="BA"
replace country="ALB" if country_full=="AL"
replace country="AUT" if country_full=="AT"
replace country="BEL" if country_full=="BE"
replace country="BGR" if country_full=="BG"
replace country="HRV" if country_full=="HR"
replace country="CYP" if country_full=="CY"
replace country="CZE" if country_full=="CZ"
replace country="DNK" if country_full=="DK"
replace country="EST" if country_full=="EE"
replace country="FIN" if country_full=="FI"
replace country="FRA" if country_full=="FR"
replace country="DEU" if country_full=="DE"
replace country="GRC" if country_full=="EL"
replace country="HUN" if country_full=="HU"
replace country="ISL" if country_full=="IS"
replace country="IRL" if country_full=="IE"
replace country="LIE" if country_full=="LI"
replace country="ITA" if country_full=="IT"
replace country="LVA" if country_full=="LV"
replace country="LTU" if country_full=="LT"
replace country="LUX" if country_full=="LU"
replace country="MKD" if country_full=="MK"
replace country="MLT" if country_full=="MT"
replace country="NLD" if country_full=="NL"
replace country="NOR" if country_full=="NO"
replace country="POL" if country_full=="PL"
replace country="PRT" if country_full=="PT"
replace country="ROU" if country_full=="RO"
replace country="SVK" if country_full=="SK"
replace country="SVN" if country_full=="SI"
replace country="SCG" if country_full=="RS"
replace country="ESP" if country_full=="ES"
replace country="SWE" if country_full=="SE"
replace country="CHE" if country_full=="CH"
replace country="TUR" if country_full=="TR"
replace country="GBR" if country_full=="UK"
drop if country=="" /*should be zero drops*/
drop country_full


reshape wide v, i(country year) j(na_item) string

local list GDP cons_total wages employee_compensation 
foreach var of local list {
	rename v`var'_current `var'_current_ES
}

label var GDP_current_ES "GDP: Eurostat, current"
label var cons_total_current_ES "Household consumption: Eurostat, current"
label var employee_compensation_current_ES "Employee compensation: Eurostat, current"
label var wages_current_ES "Salaries and wages: Eurostat, current"


merge 1:1 year country using eurostat_cons_v2
drop _merge

sort country year

drop cons_total_current_ES /*a similar - although slightly different variable - is in the eurostat_cons*/









******************************************************************************
******************************************************************************
***********************MERGING************************************************
******************************************************************************
******************************************************************************


merge 1:1 year country using consumption_gdp_OECD_mtd


** This makes me uncomfortable, we mix the OECD and EUROSTAT values quite extensively...
** Keep both sets of variables for now
** The variables look similar except for Iceland (at least)

sort country year

gen id_OECD=0
gen id_ES=0
replace id_OECD=1 if cons_total_current_O!=. & year<=2018
replace id_ES=1 if cons_total_ES!=. & year<=2018
by country: egen N_OECD=total(id_OECD)
by country: egen N_ES=total(id_ES)




local list total rec_total rec_audio_video rec_books rec_other_dur rec_other_goods rec_pack_hol rec_services hh_items
foreach var of local list {
	gen fcons_`var'=.
	replace fcons_`var'=cons_`var'_current_O if N_OECD>N_ES
	replace fcons_`var'=cons_`var'_ES if N_OECD<=N_ES
}

drop _merge id_OECD id_ES N_OECD N_ES

gen id_OECD=0
gen id_ES=0
replace id_OECD=1 if consumption_hh_current_O!=. & year<=2018
replace id_ES=1 if cons_total_ES!=. & year<=2018
by country: egen N_OECD=total(id_OECD)
by country: egen N_ES=total(id_ES)
gen fconsumption_total_longer=.
replace fconsumption_total_longer=consumption_hh_current_O if N_OECD>=N_ES
replace fconsumption_total_longer=cons_total_ES if N_OECD<N_ES
drop id_OECD id_ES N_OECD N_ES
label variable fconsumption_total_longer "Longer total consumption series"


*employee compensation and GDPs are highly correlated, so just select ones with more observations
gen id_OECD=0
gen id_ES=0
replace id_OECD=1 if employee_compensation_current_O!=. & year<=2018
replace id_ES=1 if employee_compensation_current_ES!=. & year<=2018
by country: egen N_OECD=total(id_OECD)
by country: egen N_ES=total(id_ES)
gen femployee_compensation_current=.
replace femployee_compensation_current=employee_compensation_current_O if N_OECD>N_ES
replace femployee_compensation_current=employee_compensation_current_ES if N_OECD<=N_ES
drop id_OECD id_ES N_OECD N_ES

gen id_OECD=0
gen id_ES=0
replace id_OECD=1 if GDP_expenditure_current_O!=. & year<=2018
replace id_ES=1 if GDP_current_ES!=. & year<=2018
by country: egen N_OECD=total(id_OECD)
by country: egen N_ES=total(id_ES)
gen fGDP_current=.
replace fGDP_current=GDP_expenditure_current_O if N_OECD>N_ES
replace fGDP_current=GDP_current_ES if N_OECD<=N_ES
drop id_OECD id_ES N_OECD N_ES





save consumption_gdp.dta, replace

erase consumption_gdp_OECD_mtd.dta
erase eurostat_cons_v2.dta
